############################################################################
### Replication of Skorge, O.S. (2021). Mobilizing the Underrepresented: ###
### Electoral Systems and Gender Inequality in Political Participation,  ###
### AJPS, DOI: 10.1111/ajps.12654                                        ###
############################################################################


### This script creates the dataset for the replication. It is is based 
### on the dataverse files provided by Skorge (2021), modified to create
### the Gender Turnout Gap, which is used as a dependent variable for 
### the alternative specification of the analyses.


#### DATA ####

dt <- data.table(dt.elections)

# PR missing for muni 1818 Herøy in 1916. 
# Add from Statistics Norway (1917: 25) (https://www.ssb.no/a/histstat/nos/nos_vi_110.pdf)
dt[, maj_code := ifelse(year==1916 & muni==1818,1,maj_code)]

# 1931: data only available for cities (all other municipalities have 0 on all vars)
# -> remove these from the data set ###
dt[, rm1931 := ifelse(year==1931 & elVoters_T==0, 1, 0)]
dt <- subset(dt, rm1931==0)

# Same for 1934
dt[, rm1934 := ifelse(year==1934 & elVoters_W==0, 1, 0)]
dt <- subset(dt, rm1934==0)


#### REGIONS/COUNTIES ####

#### Municipality names (names in 1919) ####

dt[, muniName := recode(muni,
                        "101='Fredrikshald';
                        102='Sarpsborg';
                        103='Fredrikstad';
                        104='Moss';
                        111='Hvaler';
                        112='Torsnes';
                        113='Borge';
                        114='Varteig';
                        115='Skjeberg';
                        116='Berg';
                        117='Idd';
                        118='Aremark';
                        119='Øymark';
                        120='Rødenes';
                        121='Rømskog';
                        122='Trøgstad';
                        123='Spydeberg';
                        124='Askim';
                        125='Eidsberg';
                        127='Skiptvet';
                        128='Rakkestad';
                        129='Degernes';
                        130='Tune';
                        131='Rolvsøy';
                        132='Glemmen';
                        133='Kråkerøy';
                        134='Onsøy';
                        135='Råde';
                        136='Rygge';
                        137='Våler';
                        138='Hobøl';
                        194='Jeløy';
                        201='Son';
                        203='Drøbak';
                        204='Hølen';
                        211='Vestby';
                        212='Kråkstad';
                        214='Ås';
                        215='Frogn';
                        216='Nesodden';
                        217='Oppegård';
                        218='Aker';
                        219='Bærum';
                        220='Asker';
                        221='Søndre Høland';
                        223='Setskog';
                        224='Aurskog';
                        225='Blaker';
                        226='Sørum';
                        227='Fet';
                        229='Enebakk';
                        230='Lørenskog';
                        231='Skedsmo';
                        232='Lillestrøm';
                        233='Nittedal';
                        234='Gjerdrum';
                        235='Ullensaker';
                        236='Nes';
                        237='Eidsvoll';
                        238='Nannestad';
                        239='Hurdal';
                        240='Feiring';
                        301='Kristiania';
                        401='Hamar';
                        402='Kongsvinger';
                        411='Nes';
                        412='Ringsaker';
                        413='Furnes';
                        414='Vang';
                        415='Løten';
                        416='Romedal';
                        417='Stange';
                        418='Nord-Odal';
                        419='Sør-Odal';
                        420='Eidskog';
                        421='Vinger';
                        422='Brandval';
                        423='Grue';
                        424='Hof';
                        425='Åsnes';
                        426='Våler';
                        427='Elverum';
                        428='Trysil';
                        429='Åmot';
                        430='Stor-Elvdal';
                        431='Sollia';
                        432='Ytre Rendal';
                        433='Øvre Rendal';
                        434='Engerdal';
                        436='Tolga';
                        437='Tynset';
                        438='Alvdal';
                        439='Folldal';
                        440='Kvikne';
                        501='Lillehammer';
                        502='Gjøvik';
                        511='Dovre';
                        512='Lesja';
                        513='Skjåk';
                        514='Lom';
                        515='Vågå';
                        516='Heidal';
                        517='Sel';
                        518='Nord-Fron';
                        519='Sør-Fron';
                        520='Ringebu';
                        521='Øyer';
                        522='Østre Gausdal';
                        523='Vestre Gausdal';
                        524='Fåberg';
                        525='Biri';
                        526='Snertingdal';
                        527='Vardal';
                        528='Østre Toten';
                        529='Vestre Toten';
                        530='Eina';
                        531='Kolbu';
                        532='Jevnaker';
                        533='Lunner';
                        534='Gran';
                        535='Brandbu';
                        536='Søndre Land';
                        537='Fluberg';
                        538='Nordre Land';
                        539='Torpa';
                        540='Sør-Aurdal';
                        541='Etnedal';
                        542='Nord-Aurdal';
                        543='Vestre Slidre';
                        544='Øystre Slidre';
                        545='Vang';
                        601='Hønefoss';
                        602='Drammen';
                        604='Kongsberg';
                        611='Tyristrand';
                        612='Hole';
                        613='Norderhov';
                        614='Ådal';
                        615='Flå';
                        616='Nes';
                        617='Gol';
                        618='Hemsedal';
                        619='Ål';
                        620='Hol';
                        621='Sigdal';
                        622='Krødsherad';
                        623='Modum';
                        624='Øvre Eiker';
                        625='Nedre Eiker';
                        626='Lier';
                        627='Røyken';
                        628='Hurum';
                        629='Ytre Sandsvær';
                        630='Øvre Sandsvær';
                        631='Flesberg';
                        632='Rollag';
                        633='Nore';
                        634='Opdal';
                        701='Svelvik';
                        702='Holmestrand';
                        703='Horten';
                        704='Åsgårdstrand';
                        705='Tønsberg';
                        706='Sandefjord';
                        707='Larvik';
                        711='Strømmen';
                        712='Skoger';
                        713='Sande';
                        714='Hof';
                        715='Botne';
                        716='Våle';
                        717='Borre';
                        718='Ramnes';
                        719='Andebu';
                        720='Stokke';
                        721='Sem';
                        722='Nøtterøy';
                        723='Tjøme';
                        724='Sandar';
                        725='Tjølling';
                        726='Brunlanes';
                        727='Hedrum';
                        728='Lardal';
                        798='Fredriksværn';
                        801='Kragerø';
                        802='Langesund';
                        803='Stathelle';
                        804='Brevik';
                        805='Porsgrunn';
                        806='Skien';
                        807='Notodden';
                        811='Siljan';
                        812='Gjerpen';
                        813='Eidanger';
                        814='Bamble';
                        815='Skåtøy';
                        816='Sannidal';
                        817='Drangedal';
                        818='Solum';
                        819='Holla';
                        820='Lunde';
                        821='Bø';
                        822='Sauherad';
                        823='Heddal';
                        824='Gransherad';
                        825='Hovin';
                        826='Tinn';
                        827='Hjartdal';
                        828='Seljord';
                        829='Kviteseid';
                        830='Nissedal';
                        831='Fyresdal';
                        832='Mo';
                        833='Lårdal';
                        834='Vinje';
                        835='Rauland';
                        901='Risør';
                        902='Tvedestrand';
                        903='Arendal';
                        904='Grimstad';
                        905='Lillesand';
                        911='Gjerstad';
                        912='Vegårshei';
                        913='Søndeled';
                        914='Holt';
                        915='Dypvåg';
                        916='Flosta';
                        917='Stokken';
                        918='Austre Moland';
                        919='Froland';
                        920='Øyestad';
                        921='Tromøy';
                        922='Hisøy';
                        923='Fjære';
                        924='Landvik';
                        925='Eide';
                        926='Vestre Moland';
                        927='Høvåg';
                        928='Birkenes';
                        929='Åmli';
                        930='Gjøvdal';
                        931='Tovdal';
                        932='Mykland';
                        933='Herefoss';
                        934='Vegusdal';
                        935='Iveland';
                        936='Hornnes';
                        937='Evje';
                        938='Bygland';
                        939='Hylestad';
                        940='Valle';
                        941='Bykle';
                        1001='Kristiansand';
                        1002='Mandal';
                        1003='Farsund';
                        1004='Flekkefjord';
                        1011='Randesund';
                        1012='Oddernes';
                        1013='Tveit';
                        1014='Vennesla';
                        1015='Hægeland';
                        1016='Øvrebø';
                        1017='Greipstad';
                        1018='Søgne';
                        1019='Halse og Harkmark';
                        1020='Holum';
                        1021='Øyslebø';
                        1022='Laudal';
                        1023='Finsland';
                        1024='Bjelland';
                        1025='Grindheim';
                        1026='Åseral';
                        1027='Konsmo';
                        1028='Vigmostad';
                        1029='Sør-Audnedal';
                        1030='Spangereid';
                        1031='Austad';
                        1032='Lyngdal';
                        1033='Kvås';
                        1034='Hægebostad';
                        1035='Eiken';
                        1036='Fjotland';
                        1037='Liknes';
                        1038='Feda';
                        1039='Herad';
                        1040='Spind';
                        1041='Lista';
                        1042='Hidra';
                        1043='Nes';
                        1044='Gyland';
                        1045='Bakke';
                        1046='Tonstad';
                        1047='Øvre Sirdal';
                        1101='Egersund';
                        1102='Sandnes';
                        1103='Stavanger';
                        1104='Skudeneshavn';
                        1105='Kopervik';
                        1106='Haugesund';
                        1107='Sogndal Ladested';
                        1111='Sokndal';
                        1112='Lund';
                        1113='Heskestad';
                        1114='Bjerkreim';
                        1115='Helleland';
                        1116='Eigersund';
                        1117='Ogna';
                        1118='Varhaug';
                        1119='Nærbø';
                        1120='Klepp';
                        1121='Time';
                        1122='Gjestal';
                        1123='Høyland';
                        1124='Haaland';
                        1126='Hetland';
                        1128='Høle';
                        1129='Forsand';
                        1130='Strand';
                        1131='Årdal';
                        1132='Fister';
                        1133='Hjelmeland';
                        1134='Suldal';
                        1135='Sauda';
                        1136='Sand';
                        1137='Erfjord';
                        1138='Jelsa';
                        1139='Nedstrand';
                        1140='Sjernarøy';
                        1141='Finnøy';
                        1142='Rennesøy';
                        1143='Mosterøy';
                        1145='Bokn';
                        1146='Tysvær';
                        1147='Avaldsnes';
                        1148='Stangaland';
                        1149='Åkra';
                        1150='Skudenes';
                        1152='Torvastad';
                        1153='Skåre';
                        1154='Skjold';
                        1155='Vats';
                        1157='Vikedal';
                        1211='Etne';
                        1212='Skånevik';
                        1213='Fjelberg';
                        1214='Ølen';
                        1215='Vikebygd';
                        1216='Sveio';
                        1217='Valestrand';
                        1218='Moster';
                        1219='Bømlo';
                        1220='Bremnes';
                        1221='Stord';
                        1222='Fitjar';
                        1223='Tysnes';
                        1224='Kvinnherad';
                        1225='Varaldsøy';
                        1226='Strandebarm';
                        1227='Jondal';
                        1228='Odda';
                        1229='Røldal';
                        1230='Ullensvang';
                        1231='Kinsarvik';
                        1232='Eidfjord';
                        1233='Ulvik';
                        1234='Granvin';
                        1235='Voss';
                        1236='Vossestrand';
                        1237='Evanger';
                        1238='Kvam';
                        1239='Hålandsdal';
                        1240='Strandvik';
                        1241='Fusa';
                        1242='Samnanger';
                        1243='Os';
                        1244='Austevoll';
                        1245='Sund';
                        1246='Fjell';
                        1247='Askøy';
                        1248='Laksevåg';
                        1249='Fana';
                        1250='Haus';
                        1251='Bruvik';
                        1252='Modalen';
                        1253='Hosanger';
                        1254='Hamre';
                        1255='Åsane';
                        1257='Alversund';
                        1258='Herdla';
                        1259='Hjelme';
                        1261='Manger';
                        1263='Lindås';
                        1264='Austrheim';
                        1266='Masfjorden';
                        1301='Bergen';
                        1401='Florø';
                        1411='Gulen';
                        1412='Solund';
                        1413='Hyllestad';
                        1414='Brekke';
                        1415='Lavik';
                        1416='Kyrkjebø';
                        1417='Vik';
                        1418='Balestrand';
                        1419='Leikanger';
                        1420='Sogndal';
                        1421='Aurland';
                        1422='Lærdal';
                        1423='Borgund';
                        1424='Årdal';
                        1425='Hafslo';
                        1426='Luster';
                        1427='Jostedal';
                        1428='Askvoll';
                        1429='Fjaler';
                        1430='Gaular';
                        1431='Jølster';
                        1432='Førde';
                        1433='Naustdal';
                        1434='Vevring';
                        1437='Kinn';
                        1438='Bremanger';
                        1439='Sør-Vågsøy';
                        1440='Nord-Vågsøy';
                        1441='Selje';
                        1442='Davik';
                        1443='Eid';
                        1444='Hornindal';
                        1445='Gloppen';
                        1446='Breim';
                        1447='Innvik';
                        1448='Stryn';
                        1501='Ålesund';
                        1502='Molde';
                        1503='Kristiansund';
                        1511='Vanylven';
                        1512='Syvde';
                        1513='Rovde';
                        1514='Sande';
                        1515='Herøy';
                        1516='Ulstein';
                        1517='Hareid';
                        1519='Volda';
                        1520='Ørsta';
                        1521='Vartdal';
                        1522='Hjørundfjord';
                        1523='Sunnylven';
                        1524='Norddal';
                        1525='Stranda';
                        1526='Stordal';
                        1527='Ørskog';
                        1528='Sykkylven';
                        1529='Skodje';
                        1530='Vatne';
                        1531='Borgund';
                        1532='Giske';
                        1533='Vigra';
                        1534='Haram';
                        1535='Vestnes';
                        1536='Tresfjord';
                        1537='Voll';
                        1538='Eid';
                        1539='Grytten';
                        1540='Hen';
                        1541='Veøy';
                        1542='Eresfjord og Vistdal';
                        1543='Nesset';
                        1544='Bolsøy';
                        1545='Akerø';
                        1546='Sandøy';
                        1548='Fræna';
                        1549='Bud';
                        1550='Hustad';
                        1551='Eide';
                        1552='Kornstad';
                        1553='Kvernes';
                        1554='Bremsnes';
                        1555='Grip';
                        1556='Frei';
                        1557='Gjemnes';
                        1558='Øre';
                        1559='Straumsnes';
                        1560='Tingvoll';
                        1561='Øksendal';
                        1562='Ålvundeid';
                        1563='Sunndal';
                        1564='Stangvik';
                        1565='Åsskard';
                        1566='Surnadal';
                        1567='Rindal';
                        1568='Stemshaug';
                        1569='Aure';
                        1570='Valsøyfjord';
                        1571='Halsa';
                        1572='Tustna';
                        1573='Edøy';
                        1574='Brattvær';
                        1575='Hopen';
                        1601='Trondheim';
                        1612='Hemne';
                        1614='Heim';
                        1615='Sandstad';
                        1616='Fillan';
                        1617='Hitra';
                        1618='Kvenvær';
                        1619='Sør-Frøya';
                        1620='Nord-Frøya';
                        1621='Ørland';
                        1622='Agdenes';
                        1623='Lensvik';
                        1624='Rissa';
                        1625='Stadsbygd';
                        1626='Stjørna';
                        1627='Bjugn';
                        1628='Nes';
                        1629='Jøssund';
                        1630='Aa';
                        1631='Stoksund';
                        1632='Roan';
                        1633='Osen';
                        1634='Opdal';
                        1635='Rennebu';
                        1636='Meldal';
                        1638='Orkdal';
                        1640='Røros';
                        1644='Ålen';
                        1645='Haltdalen';
                        1646='Singsås';
                        1647='Budal';
                        1648='Støren';
                        1649='Soknedal';
                        1650='Horg';
                        1651='Hølandet';
                        1652='Flå';
                        1653='Melhus';
                        1654='Leinstrand';
                        1655='Byneset';
                        1656='Buvik';
                        1657='Børseskogen';
                        1658='Børsa';
                        1659='Geitastrand';
                        1660='Strinda';
                        1661='Tiller';
                        1662='Klæbu';
                        1663='Malvik';
                        1664='Selbu';
                        1665='Tydal';
                        1701='Levanger';
                        1702='Steinkjer';
                        1703='Namsos';
                        1711='Meråker';
                        1712='Hegra';
                        1713='Lånke';
                        1714='Stjørdal';
                        1715='Skatval';
                        1716='Åsen';
                        1717='Frosta';
                        1718='Leksvik';
                        1719='Skogn';
                        1720='Frol';
                        1721='Verdal';
                        1722='Ytterøy';
                        1723='Mosvik';
                        1724='Verran';
                        1725='Namdalseid';
                        1726='Malm';
                        1727='Beitstad';
                        1728='Sandvollan';
                        1729='Inderøy';
                        1730='Røra';
                        1731='Sparbu';
                        1732='Ogndal';
                        1733='Egge';
                        1734='Stod';
                        1735='Kvam';
                        1736='Snåsa';
                        1737='Sørli';
                        1738='Nordli';
                        1742='Grong';
                        1743='Høylandet';
                        1744='Overhalla';
                        1745='Vemundvik';
                        1746='Klinga';
                        1747='Otterøy';
                        1748='Fosnes';
                        1749='Flatanger';
                        1750='Vikna';
                        1751='Nærøy';
                        1752='Kolvereid';
                        1753='Foldereid';
                        1754='Gravvik';
                        1755='Leka';
                        1802='Mosjøen';
                        1804='Bodø';
                        1805='Narvik';
                        1806='Svolvær';
                        1811='Bindal';
                        1812='Vik';
                        1813='Velfjord';
                        1814='Brønnøy';
                        1815='Vega';
                        1816='Vevelstad';
                        1817='Tjøtta';
                        1818='Herøy';
                        1819='Nordvik';
                        1820='Alstahaug';
                        1821='Stamnes';
                        1822='Leirfjord';
                        1824='Vefsn';
                        1826='Hattfjelldal';
                        1827='Dønnes';
                        1828='Nesna';
                        1830='Korgen';
                        1832='Hemnes';
                        1833='Nord-Rana';
                        1834='Lurøy';
                        1835='Træna';
                        1836='Rødøy';
                        1837='Meløy';
                        1838='Gildeskål';
                        1839='Beiarn';
                        1840='Saltdal';
                        1841='Fauske';
                        1842='Skjerstad';
                        1843='Bodin';
                        1844='Kjerringøy';
                        1845='Sørfold';
                        1846='Nordfold';
                        1847='Leiranger';
                        1848='Steigen';
                        1849='Hamarøy';
                        1850='Tysfjord';
                        1851='Lødingen';
                        1852='Tjeldsund';
                        1853='Evenes';
                        1855='Ankenes';
                        1857='Værøy';
                        1858='Moskenes';
                        1859='Flakstad';
                        1860='Buksnes';
                        1861='Hol';
                        1862='Borge';
                        1864='Gimsøy';
                        1865='Vågan';
                        1866='Hadsel';
                        1867='Bø';
                        1868='Øksnes';
                        1869='Langenes';
                        1870='Sortland';
                        1872='Dverberg';
                        1901='Harstad';
                        1902='Tromsø';
                        1911='Kvæfjord';
                        1914='Trondenes';
                        1915='Bjarkøy';
                        1917='Ibestad';
                        1920='Lavangen';
                        1921='Salangen';
                        1922='Bardu';
                        1924='Målselv';
                        1925='Sørreisa';
                        1926='Dyrøy';
                        1927='Tranøy';
                        1928='Torsken';
                        1929='Berg';
                        1930='Hillesøy';
                        1931='Lenvik';
                        1932='Malangen';
                        1933='Balsfjord';
                        1934='Tromsøysund';
                        1935='Helgøy';
                        1936='Karlsøy';
                        1937='Sørfjord';
                        1938='Lyngen';
                        1941='Skjervøy';
                        1942='Nordreisa';
                        1943='Kvænangen';
                        2001='Hammerfest';
                        2002='Vardø';
                        2003='Vadsø';
                        2011='Kautokeino';
                        2012='Alta';
                        2013='Talvik';
                        2014='Loppa';
                        2015='Hasvik';
                        2016='Sørøysund';
                        2017='Kvalsund';
                        2018='Måsøy';
                        2019='Kjelvik';
                        2020='Kistrand';
                        2021='Karasjok';
                        2022='Lebesby';
                        2023='Gamvik';
                        2024='Berlevåg';
                        2025='Tana';
                        2026='Polmak';
                        2027='Nesseby';
                        2028='Vardø Landdistrikt';
                        2029='Nord-Varanger';
                        2030='Sør-Varanger'")]


#### Counties ####

unique(dt$muni)
sort(unique(as.numeric(gsub("(.*)\\d{2}","\\1",dt$muni))))
dt[, county := as.numeric(gsub("(.*)\\d{2}","\\1",muni))]
table(dt$muni,dt$county)

dt[, countyName := recode(county,"01='Østfold';02='Akershus';03='Oslo';04='Hedmark';05='Opland';06='Buskerud';07='Vestfold';08='Telemark';09='Aust-Agder';10='Vest-Agder';11='Rogaland';12='Hordaland';13='Bergen';14='Sogn og Fjordane';15='Møre og Romsdal';16='Sør-Trøndelag';17='Nord-Trøndelag';18='Nordland';19='Troms';20='Finnmark'")]


#### Urban ####

dt[, urban := ifelse(as.numeric(str_sub(muni,start=-2,end=-2))==0,1,0)]


#### ELECTORAL VARIABLES AND SAMPLE ####

#### Eligible voters: total, women, men, share female ####

### elVoters_T: eligible voters, total
### log.elVoters_T

dt[, log.elVoters_T := log(elVoters_T)]
dt[, log.elVoters_T := ifelse(log.elVoters_T==-Inf,NA,log.elVoters_T)]

### elVoters_W: eligible voters, women

dt[, elVoters_W := ifelse(year==1898,0,elVoters_W)] #Women didn't have the right to vote in 1898

### elVoters_M: eligible voters, men

dt[, elVoters_M := ifelse(year %in% c(1898,1934,1937),(elVoters_T - elVoters_W),elVoters_M)]

### relElVoters_W: women's percentage of eligible voters

dt[, relElVoters_W := (elVoters_W/(elVoters_W+elVoters_M))*100]

#### Votes: total, women, men, share female ####

### votes_T: votes, total

table(dt$year,!is.na(dt$votes_T))
table(dt$year,!is.na(dt$votesAppr_T)) #approved votes available for 1898, 1901, 1937. Need only use for 1898
dt[, votes_T := ifelse(year==1898,votesAppr_T,votes_T)]

### votes_W: votes, women

table(dt$year,!is.na(dt$votes_W)) 
dt[, votes_W := ifelse(year==1898,0,votes_W)]

### votes_M: votes, men

table(dt$year,!is.na(dt$votes_M)) #need to fix 1898, 1901, 1937
dt[, votes_M := ifelse(year %in% c(1898,1901,1937),(votes_T - votes_W),votes_M)]

### relTurnout_W: fraction female voters ###

dt[, relTurnout_W := ifelse(year==1898, 0, (votes_W/votes_T))]


#### Turnout: total, women, men, female-to-male ratio ####

### turnout: overall turnout ###

dt[, turnout := ifelse(is.na(votes_T)|is.na(elVoters_T),NA,((votes_T/elVoters_T)*100))]

### turnout_W: female turnout ###

dt[, turnout_W := ifelse(is.na(votes_W)|is.na(elVoters_W),NA,
                         ifelse(elVoters_W==0,0,((votes_W/elVoters_W)*100)))]

### turnout_M: male turnout ###

dt[, turnout_M := ifelse(is.na(votes_M)|is.na(elVoters_M),NA,((votes_M/elVoters_M)*100))]

dt %>% 
  dplyr::filter(turnout_M>100) %>% 
  dplyr::select(muni,year,elVoters_M,elVoters_W,votes_M,votes_W)
# Four municipality-years:
#    muni year elVoters_M elVoters_W votes_M votes_W
# 1: 2028 1901        182         94     193      15
# 2:  424 1910        626        610     644     265
# 3: 1526 1919        208        251     223      62
# 4: 1439 1937        626        789     689     418
# The electoral statistics reports show the same,
# e.g. SSB (1920) shows the same numbers for these municipalities

#set over 100% turnout to 100
dt[, turnout_M := ifelse(turnout_M>100,100,turnout_M)]

### turnoutRatio: female-to-male turnout ratio

dt[, turnoutRatio := ifelse(elVoters_T==0,NA,turnout_W/turnout_M)] 


#################################################################################
# THIS LINE IS THE FIRST MODIFICATION MADE TO THE ORIGINAL FILE.              ###
# THE ORIGINAL FILE IS NAMED "data_main_03_merging_and_creating_variables.r"  ###
# AND IS IN THE "data" FOLDER IN THE ORIGINAL DATAVERSE.                      ###
# THE LINE CREATES THE GENDER TURNOUT GAP USED TO RUN THE ALTERNATIVE         ###
# ANALYSES.                                                                   ###
#                                                                             ###
dt[, turnoutGap := ifelse(elVoters_T==0,NA,turnout_W - turnout_M)]            ###
#################################################################################

#### Representatives in municipal councils ####

### reps_T: total representatives 

# already defined

### log.repsT

dt[, log.reps_T := log(reps_T)]
dt[, log.reps_T := ifelse(log.reps_T==-Inf,NA,log.reps_T)]

#### Women's representation ####

### rep_W_pct: percentage women in municipal council ###

dt[, reps_W_pct := ifelse(year==1898, 0, 
                          ifelse(is.na(reps_W),NA,((reps_W/reps_T)*100)))]

### rep_W_bi: any women in municipal council ###

dt[, reps_W_bi := ifelse(year==1898, 0, 
                         ifelse(is.na(reps_W), NA,
                                ifelse(reps_W>0, 1, 0)))]


#### Sample #### 

#Municipalities that exist in 1919
#That are observed between 1913/1922 OR 1910/1928
#That didn't switch to PR after 1913 OR after 1901 

### Municipalities observed in 1910, 1913 and 1916 

# Used for analysis of voluntary switches to PR between 1913 and 1916
dt[, munisObs1910to1916 := ifelse(year>1909 & year<1917,1,0)]
dt[, obs1910to1916 := sum(munisObs1910to1916,na.rm=TRUE), by="muni"]
dt[, munisObs1910to1916 := ifelse(obs1910to1916==3, 1, 0), by="muni"]
dt[, obs1910to1916 := NULL]


#### Electoral system ####

### Cleaning 

dt[, pr_code := ifelse(pr_code>1,NA,pr_code)] # 1898, 1901
dt[, pr_code2 := ifelse(maj_code==1,0,
                        ifelse(maj_code==0,1,maj_code))] #1913, 1916

# dt[, pr := ifelse(year %in% c(1898,1901),pr_code,
#                   ifelse(year %in% c(1913,1916),pr_code2,
#                          ifelse(year>1918,1,NA)))]

### prObserved: PR variable with observed years and otherwise NA

# dt[, prObserved := ifelse(year %in% c(1898,1901),pr_code,
#                           ifelse(year %in% c(1913,1916),pr_code2,NA))]
dt[, pr := ifelse(year %in% c(1898,1901),pr_code,
                  ifelse(year %in% c(1913,1916),pr_code2,NA))]

### PR in various years

dt[, pr98 := mean(pr[year==1898], na.rm=TRUE), by=muni]
dt[, pr01 := mean(pr[year==1901], na.rm=TRUE), by=muni]
dt[, pr13 := mean(pr[year==1913], na.rm=TRUE), by=muni]
dt[, pr16 := mean(pr[year==1916], na.rm=TRUE), by=muni]

### prORmaj13to16: not switched to PR between 1913 and 1916

dt[, prORmaj13to16 := ifelse((pr13==1 & pr16==1)|(pr13==0 & pr16==0),1,0)]

### prFrom10: PR variable from 1910 (and did not switch between 1913 and 1916)

# Main PR variable, which gives comparison from 1910 for Figure 1 in the MS
dt[prORmaj13to16==1, prFrom10 := ifelse(is.na(pr),pr13,pr)]


#### CENSUS VARIABLES ####

dt.census <- data.table(dt.census)

#### Preparing population data ####

dt.census[year==1930, antall_hjemmehrende_kvinner := (kvinner_under_15_r + kvinner_15_r_eller_eldre) ]
na.omit(dt.census[, summary(antall_hjemmehrende_kvinner)[7], by=year]) #missing 1930, which is correct

setnames(dt.census, c("antall_hjemmehrende_personer_ialt","antall_hjemmehrende_kvinner"),
         c("pop","pop_w"))

dt.census[, pop_m := (pop - pop_w)]

#### Population ####

dt.census[, log.pop := log(pop)]

#### Population age 20 or older, by gender ####

dt.census[, popM20 := menn_20_24_r+menn_25_r_eller_eldre]
dt.census[, popW20 := kvinner_20_24_r+kvinner_25_r_eller_eldre]

# Different source variables for 1910
dt.census[, popM20 := ifelse(year==1910,
                             menn_20_29_r+menn_30_49_r+menn_50_69_r+menn_70_r_eller_eldre,
                             popM20)]
dt.census[, popW20 := ifelse(year==1910,
                             kvinner_20_29_r+kvinner_30_49_r+kvinner_50_69_r+kvinner_70_r_eller_eldre,
                             popW20)]

dt.census[, pop20 := popM20 + popW20]

#### Population age 25 or older, by gender ####

dt.census[, popM25 := menn_25_r_eller_eldre]
dt.census[, popW25 := kvinner_25_r_eller_eldre]
dt.census[, pop25 := popM25 + popW25]

### Keep variables

names <- names(dt.census)
keep.vars <- c("komnr","year",
               names[grepl("occup_", names)], 
               names[grepl("pop", names)],
               names[grepl("rel_", names)],
               names[grepl("poor", names)])
dt.census <- dt.census[, keep.vars, with=FALSE]

#### Percentage women in the population ####

dt.census[, relPop_W := (pop_w/pop)*100]

#### Percentage employed in different employment categories #####

dt.census[, occup_tot := (occup_agri + occup_indu + occup_service + occup_sea)]
cols <- c("occup_agri","occup_indu","occup_service","occup_sea")
newcols <- paste0(cols,"Pct")
dt.census[, (newcols) := lapply(.SD, function(x) (x/dt.census$occup_tot)*100), .SDcols=cols]

#### Percentage Dissenters in the population ####

dt.census[, rel_dissenterPct := (rel_dissenter/pop)*100]

#### Percentage of population on poor relief ####

dt.census[, poorReliefPct := (poorRelief/pop)*100]

#### Interpolation ####

# Linear interpolation between census years (1891, 1900, 1910, 1920, 1930, 1946)
# to get estimates for each election year.

### Creating yearly data set

dt.komnr.yr <- arrange(expand.grid(komnr=sort(unique(dt.census$komnr)),
                                   year=1891:1946), komnr)

dt.census.i <- merge(dt.census, dt.komnr.yr, by=c("komnr","year"), all=TRUE)

### Linear interpolation function

# library(zoo)
na.interpol <- function(x) {
  if (sum(!is.na(x))<2) {
    return(x)
  } else {
    na.approx(x,na.rm=FALSE) 
  }
}

### Employing the function

dt.census.i <- data.table(dt.census.i)
cols <- names(dt.census.i)[names(dt.census.i) %notin% c("komnr")]
dt.census.i[, (cols) := lapply(.SD, as.numeric), .SDcols=cols]
dt.census.i <- dt.census.i[,  c(lapply(.SD,na.interpol)),by=komnr,.SDcols=cols] 

#### Merging with main data ####

setnames(dt.census.i, "komnr", "muni")
dt <- merge(dt, dt.census.i, by=c("muni","year"), all.x=TRUE)

### elVotersSharePop: share of population eligible to vote

# adding share of population eligible to vote as need both population and electoral data
dt[, elVotersSharePop := (elVoters_T/pop)]

#### AREA ####

#### Interpolation ####

# Linear interpolation between census years (1891, 1900, 1910, 1920, 1930, 1946)

### Creating yearly data set

dt.muni.yr <- arrange(expand.grid(muni=sort(unique(dt.area$muni)),
                                  year=1891:1946), muni)

dt.area.i <- merge(dt.area, dt.muni.yr, by=c("muni","year"), all=TRUE)

### Employing the interpolation function

dt.area.i <- data.table(dt.area.i)
cols <- names(dt.area.i)[names(dt.area.i) %notin% c("muni")]
dt.area.i[, (cols) := lapply(.SD, as.numeric), .SDcols=cols]
dt.area.i <- dt.area.i[,  c(lapply(.SD,na.interpol)),by=muni,.SDcols=cols] 

#### Area; population density ####

# Merge with dt.census.i
dt.area.i <- merge(dt.area.i, dt.census.i[, c("muni","year","pop"),with=FALSE], by=c("muni","year"),all.x=TRUE)

dt.area.i[, log.area := log(area)]

dt.area.i[, popDens := pop/area]
dt.area.i[, log.popDens := log(popDens)]
dt.area.i[, pop := NULL]

dt <- merge(dt, dt.area.i, by=c("muni","year"), all.x=TRUE)

######
# 


#### CLEANING: KEEPING VARIABLES AND RENAME WITHOUT UNDERSCORES ####

setnames(dt, names(dt), gsub("\\_([a-zA-Z])","\\U\\1", names(dt), perl=TRUE))

keep.vars <- c("muni","year","relTurnoutW","turnoutRatio", "turnoutGap","turnoutW","turnout",
               "pr","pr98","pr01","pr13","pr16","prORmaj13to16","munisObs1910to1916",
               "elVotersT","log.elVotersT","relElVotersW","repsT","log.repsT",
               "popW","popM","pop","log.pop","area","log.area","popDens","log.popDens","relPopW",
               "occupInduPct","occupServicePct","occupSeaPct","occupAgriPct","relDissenterPct",
               "repsWPct","repsWBi", "elVotersSharePop","poorReliefPct")

dt <- dt[, keep.vars, with=FALSE]

dt <- subset(dt, year %in% 1907:1928) # years included in the analysis

#### WRITING DATA ####

write.table(dt,here("data_main.csv"),row.names=FALSE, col.names=TRUE, sep=",")
